Clustering

Histogram of degrees

Trajectory analysis

#for each ID, find the minimum round (1 to 10) for which degree is 0
sample.i = clust.res[clust.res$degree<=2 & clust.res$round!=0,]
sample.i$round = as.numeric(sample.i$round) - 1
isolationRound <- aggregate(round ~ ID, data = sample.i, FUN = min, na.rm = TRUE)
colnames(isolationRound)[c(1:2)] = c("ID","roundZero")

#subtract the minimum round for which degree is 0 to obtain rounds to isolation
sample.traj = merge(clust.res[clust.res$round!=0,], isolationRound, by="ID", all.x=TRUE)
sample.traj$round = as.numeric(sample.traj$round) - 1
sample.traj$roundToIsolation = sample.traj$round - sample.traj$roundZero

#remove all "positive" rounds
sample.traj = subset(sample.traj, sample.traj$roundToIsolation<=0)


#trajectories
sum_traj <- sample.traj %>% 
  group_by(roundToIsolation) %>%
  summarise(mean = mean(degree, na.rm = TRUE),
            ci   = 1.96 * sd(degree, na.rm = TRUE)/sqrt(n()))
sum_traj %>%
  ggplot(aes(x = roundToIsolation, y = mean)) +
  geom_line(aes()) +
  geom_errorbar(aes(ymin = mean - ci, ymax = mean + ci),
                width = .1) +
  theme_bw() +
  geom_point(size = 4) +
  labs(
    title = "Trajectories of degrees",
    x = "Round to isolation",
    y = "Degree",
    color = NULL,
    linetype = NULL) +
  theme(plot.title = element_text(hjust = 0.5))

Comparison of degree trajectory and other variables

#Plot the degree trajectory (rounds to isolation) and another variable on the same graph
sample.cc = sample.traj

sum_traj <- sample.cc %>% 
  group_by(roundToIsolation) %>%
  summarise(mean.degree = mean(degree),
            ci.degree   = 1.96 * sd(degree)/sqrt(n()))

sum_coef <- sample.cc %>% 
  group_by(roundToIsolation) %>%
  summarise(mean = mean(coefLocal, na.rm=TRUE),
            ci   = 1.96 * sd(coefLocal, na.rm=TRUE)/sqrt(n()))

sum_alterPrevWealth <- sample.cc %>% 
  group_by(roundToIsolation) %>%
  summarise(mean = mean(alterPrevWealth, na.rm=TRUE),
            ci   = 1.96 * sd(alterPrevWealth, na.rm=TRUE)/sqrt(n()))

sum_egoPrevWealth <- sample.cc %>% 
  group_by(roundToIsolation) %>%
  summarise(mean = mean(egoPrevWealth, na.rm=TRUE),
            ci   = 1.96 * sd(egoPrevWealth, na.rm=TRUE)/sqrt(n()))

sum_wealthDiff <- sample.cc %>% 
  group_by(roundToIsolation) %>%
  summarise(mean = mean(wealthDiff, na.rm=TRUE),
            ci   = 1.96 * sd(wealthDiff, na.rm=TRUE)/sqrt(n()))

sum_alterBehavior <- sample.cc %>% 
  group_by(roundToIsolation) %>%
  summarise(mean = mean(alterBehavior, na.rm=TRUE),
            ci   = 1.96 * sd(alterBehavior, na.rm=TRUE)/sqrt(n()))

sample.cc <- sample.cc %>%
  mutate(behavior = 
           case_when(
             behavior=="C" ~ 1,
             behavior=="D" ~ 0
           ))

sum_behavior <- sample.cc %>% 
  group_by(roundToIsolation) %>%
  summarise(mean = mean(behavior, na.rm=TRUE),
            ci   = 1.96 * sd(behavior, na.rm=TRUE)/sqrt(n()))






cc.compare = merge(data.frame(sum_traj),data.frame(sum_wealthDiff),by="roundToIsolation")

scale=100

g.compare = ggplot(data=cc.compare, aes(x=roundToIsolation,y=mean.degree)) +
  geom_line(aes(color="Degree")) +
  geom_errorbar(aes(x = roundToIsolation, ymin = mean.degree - ci.degree, ymax = mean.degree + ci.degree, color="Degree"),width = .1) +
  geom_point(aes(x = roundToIsolation, y=mean.degree, color="Degree"),size = 4) +
  geom_line(aes(y = mean/scale, color="Average wealth difference in previous round")) +
  geom_errorbar(aes(x = roundToIsolation, ymin = mean/scale - ci/scale, ymax = mean/scale + ci/scale, color="Average wealth difference in previous round"),width = .1) +
  geom_point(aes(x = roundToIsolation, y=mean/scale, color="Average wealth difference in previous round"),size = 4) +
  scale_y_continuous(
    # Features of the first axis
    name = "Degree",
    # Add a second axis and specify its features
    sec.axis = sec_axis(~.*scale, name="Average wealth difference in previous round")) +
  labs(
    title = "Trajectories of degrees and wealth difference",
    x = "Round to isolation",
    y = "Degree",
    color = "") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5)) +
  scale_color_manual(values = c("orange2", "gray30"))

g.compare

Does inequality cause isolation?

###does initial behavior predict whether someone will be ever isolated?
sample.reg = clust
sample.reg$isolated = ifelse(sample.reg$degree.1<=2 | sample.reg$degree.2<=2 | sample.reg$degree.3<=2 | sample.reg$degree.4<=2 | sample.reg$degree.5<=2|
                        sample.reg$degree.6<=2 | sample.reg$degree.7<=2 | sample.reg$degree.8<=2 | sample.reg$degree.9<=2 | sample.reg$degree.10<=2,
                        1,0)
sample.reg$inequality = relevel(factor(sample.reg$inequality),ref="none")

#inequality
reg.everIsolated.inequality = miceadds::glm.cluster(isolated ~ inequality*visibleWealth, 
      data = sample.reg, 
      cluster = "gameID",
      family = binomial(link = "logit"))
alpha <- 0.05
x.everIsolated.inequality <- summary(reg.everIsolated.inequality)
##                                Estimate Std. Error    z value     Pr(>|z|)
## (Intercept)                  -2.9831535  0.4345736 -6.8645527 6.669990e-12
## inequalityhigh                1.1763561  0.5391228  2.1819816 2.911089e-02
## inequalitylow                 0.5718801  0.4965976  1.1515965 2.494869e-01
## visibleWealth                 0.6049310  0.5216184  1.1597193 2.461631e-01
## inequalityhigh:visibleWealth -0.8844956  0.6346571 -1.3936590 1.634206e-01
## inequalitylow:visibleWealth  -0.3721900  0.6069544 -0.6132092 5.397380e-01
y.everIsolated.inequality <- confint(reg.everIsolated.inequality, level=1-alpha)
coef.everIsolated.inequality <- x.everIsolated.inequality[,1]
LowerCL.everIsolated.inequality <- y.everIsolated.inequality[,1]
UpperCL.everIsolated.inequality <- y.everIsolated.inequality[,2]
p.everIsolated.inequality <-x.everIsolated.inequality[,4]
#odds ratio
z.everIsolated.inequality <- rbind(
  cbind(exp(coef.everIsolated.inequality["inequalityhigh"]), exp(LowerCL.everIsolated.inequality["inequalityhigh"]), exp(UpperCL.everIsolated.inequality["inequalityhigh"]), p.everIsolated.inequality["inequalityhigh"]),
  cbind(exp(coef.everIsolated.inequality["inequalitylow"]), exp(LowerCL.everIsolated.inequality["inequalitylow"]), exp(UpperCL.everIsolated.inequality["inequalitylow"]), p.everIsolated.inequality["inequalitylow"]),
  cbind(exp(coef.everIsolated.inequality["visibleWealth"]), exp(LowerCL.everIsolated.inequality["visibleWealth"]), exp(UpperCL.everIsolated.inequality["visibleWealth"]), p.everIsolated.inequality["visibleWealth"]),
  cbind(exp(coef.everIsolated.inequality["inequalityhigh:visibleWealth"]), exp(LowerCL.everIsolated.inequality["inequalityhigh:visibleWealth"]), exp(UpperCL.everIsolated.inequality["inequalityhigh:visibleWealth"]), p.everIsolated.inequality["inequalityhigh:visibleWealth"]),
  cbind(exp(coef.everIsolated.inequality["inequalitylow:visibleWealth"]), exp(LowerCL.everIsolated.inequality["inequalitylow:visibleWealth"]), exp(UpperCL.everIsolated.inequality["inequalitylow:visibleWealth"]), p.everIsolated.inequality["inequalitylow:visibleWealth"])
)
z.everIsolated.inequality = data.frame(z.everIsolated.inequality)
names(z.everIsolated.inequality) = c("OR", "Lower CL", "Upper CL", "P-value")
z.everIsolated.inequality
##                                     OR  Lower CL Upper CL    P-value
## inequalityhigh               3.2425373 1.1271528 9.327971 0.02911089
## inequalitylow                1.7715947 0.6693599 4.688879 0.24948690
## visibleWealth                1.8311258 0.6587426 5.090033 0.24616310
## inequalityhigh:visibleWealth 0.4129224 0.1190275 1.432483 0.16342059
## inequalitylow:visibleWealth  0.6892233 0.2097584 2.264647 0.53973801
#gini
reg.everIsolated.gini = miceadds::glm.cluster(isolated ~ gini*visibleWealth, 
      data = sample.reg, 
      cluster = "gameID",
      family = binomial(link = "logit"))
alpha <- 0.05
x.everIsolated.gini <- summary(reg.everIsolated.gini)
##                      Estimate Std. Error   z value     Pr(>|z|)
## (Intercept)        -2.9955952  0.3494693 -8.571841 1.018438e-17
## gini                2.9595094  1.3271235  2.230018 2.574624e-02
## visibleWealth       0.6487687  0.4219287  1.537627 1.241400e-01
## gini:visibleWealth -2.2453421  1.5495229 -1.449054 1.473226e-01
y.everIsolated.gini <- confint(reg.everIsolated.gini, level=1-alpha)
coef.everIsolated.gini <- x.everIsolated.gini[,1]
LowerCL.everIsolated.gini <- y.everIsolated.gini[,1]
UpperCL.everIsolated.gini <- y.everIsolated.gini[,2]
p.everIsolated.gini <-x.everIsolated.gini[,4]
#odds ratio
z.everIsolated.gini <- rbind(
  cbind(exp(coef.everIsolated.gini["gini"]), exp(LowerCL.everIsolated.gini["gini"]), exp(UpperCL.everIsolated.gini["gini"]), p.everIsolated.gini["gini"]),
  cbind(exp(coef.everIsolated.gini["visibleWealth"]), exp(LowerCL.everIsolated.gini["visibleWealth"]), exp(UpperCL.everIsolated.gini["visibleWealth"]), p.everIsolated.gini["visibleWealth"]),
  cbind(exp(coef.everIsolated.gini["gini:visibleWealth"]), exp(LowerCL.everIsolated.gini["gini:visibleWealth"]), exp(UpperCL.everIsolated.gini["gini:visibleWealth"]), p.everIsolated.gini["gini:visibleWealth"])
)
z.everIsolated.gini = data.frame(z.everIsolated.gini)
names(z.everIsolated.gini) = c("OR", "Lower CL", "Upper CL", "P-value")
z.everIsolated.gini
##                            OR    Lower CL   Upper CL    P-value
## gini               19.2885062 1.431030906 259.984932 0.02574624
## visibleWealth       1.9131837 0.836778160   4.374244 0.12413996
## gini:visibleWealth  0.1058913 0.005080471   2.207073 0.14732257
#initScore
reg.everIsolated.initScore = miceadds::glm.cluster(isolated ~ initScore*visibleWealth, 
      data = sample.reg[sample.reg$inequality=="high",], 
      cluster = "gameID",
      family = binomial(link = "logit"))
alpha <- 0.05
x.everIsolated.initScore <- summary(reg.everIsolated.initScore)
##                              Estimate   Std. Error    z value     Pr(>|z|)
## (Intercept)             -1.8753386384 0.2981468924 -6.2899822 3.175024e-10
## initScore                0.0001287980 0.0005171585  0.2490494 8.033226e-01
## visibleWealth            0.0482782643 0.4680608749  0.1031453 9.178477e-01
## initScore:visibleWealth -0.0007704406 0.0010142138 -0.7596432 4.474679e-01
y.everIsolated.initScore <- confint(reg.everIsolated.initScore, level=1-alpha)
coef.everIsolated.initScore <- x.everIsolated.initScore[,1]
LowerCL.everIsolated.initScore <- y.everIsolated.initScore[,1]
UpperCL.everIsolated.initScore <- y.everIsolated.initScore[,2]
p.everIsolated.initScore <-x.everIsolated.initScore[,4]
#odds ratio
z.everIsolated.initScore <- rbind(
  cbind(exp(coef.everIsolated.initScore["initScore"]), exp(LowerCL.everIsolated.initScore["initScore"]), exp(UpperCL.everIsolated.initScore["initScore"]), p.everIsolated.initScore["initScore"]),
  cbind(exp(coef.everIsolated.initScore["visibleWealth"]), exp(LowerCL.everIsolated.initScore["visibleWealth"]), exp(UpperCL.everIsolated.initScore["visibleWealth"]), p.everIsolated.initScore["visibleWealth"]),
  cbind(exp(coef.everIsolated.initScore["initScore:visibleWealth"]), exp(LowerCL.everIsolated.initScore["initScore:visibleWealth"]), exp(UpperCL.everIsolated.initScore["initScore:visibleWealth"]), p.everIsolated.initScore["initScore:visibleWealth"])
)
z.everIsolated.initScore = data.frame(z.everIsolated.initScore)
names(z.everIsolated.initScore) = c("OR", "Lower CL", "Upper CL", "P-value")
z.everIsolated.initScore
##                                OR  Lower CL Upper CL   P-value
## initScore               1.0001288 0.9991156 1.001143 0.8033226
## visibleWealth           1.0494626 0.4193270 2.626522 0.9178477
## initScore:visibleWealth 0.9992299 0.9972455 1.001218 0.4474679

random forest model: wealth difference

ldata.rf = ldata[c("connecting","wealthDiff","visibleWealth","egoWealth","ego_behavior","alter_behavior","round")]
ldata.rf = ldata.rf[complete.cases(ldata.rf),]
ldata.rf$connecting = factor(ldata.rf$connecting)
ldata.rf$round = factor(ldata.rf$round)
ldata.rf = ldata.rf %>% mutate(round = factor(round, labels = make.names(levels(round))))
ldata.rf$ego_behavior=as.factor(ifelse(ldata.rf$ego_behavior==1,"C","D"))
ldata.rf$alter_behavior=as.factor(ifelse(ldata.rf$alter_behavior==1,"C","D"))
ldata.rf$visibleWealth=as.factor(ldata.rf$visibleWealth)

set.seed(10)
control.rf <- trainControl(method='repeatedcv', 
                                 number=10, 
                                 repeats=3,
                                 search = 'random',
                                 classProbs=TRUE,
                                 summaryFunction = multiClassSummary,
                                 allowParallel = TRUE)

model.weights <- ifelse(ldata.rf$connecting == "breakLink",
                        (1/table(ldata.rf$connecting)[1]) * 0.5,
                        (1/table(ldata.rf$connecting)[2]) * 0.5)

rf <- caret::train(connecting ~ wealthDiff + visibleWealth + egoWealth + ego_behavior + alter_behavior + round,
                         data = ldata.rf,
                         method = 'rf',
                         metric = 'AUC',
                         tuneLength  = 30, 
                         verbose=TRUE,
                         weights=model.weights,
                         trControl = control.rf)
print(rf)
## Random Forest 
## 
## 8450 samples
##    6 predictor
##    2 classes: 'breakLink', 'notBreakLink' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 3 times) 
## Summary of sample sizes: 7605, 7604, 7605, 7604, 7605, 7605, ... 
## Resampling results across tuning parameters:
## 
##   mtry  logLoss    AUC        prAUC      Accuracy   Kappa      F1         Sensitivity  Specificity  Pos_Pred_Value  Neg_Pred_Value  Precision  Recall     Detection_Rate
##    1    0.9631347  0.8826105  0.5082646  0.7836688  0.0000000        NaN  0.0000000    1.0000000          NaN       0.7836688             NaN  0.0000000  0.0000000     
##    2    0.6282555  0.8887200  0.6147786  0.8579106  0.5321705  0.6161979  0.5293591    0.9486049    0.7405398       0.8796817       0.7405398  0.5293591  0.1145183     
##    3    0.5289096  0.8892341  0.6673758  0.8630386  0.5709312  0.6554806  0.6032206    0.9347614    0.7195897       0.8952107       0.7195897  0.6032206  0.1304950     
##    5    0.5062031  0.8861534  0.6799610  0.8595669  0.5705385  0.6585521  0.6263766    0.9239390    0.6957354       0.8996513       0.6957354  0.6263766  0.1355041     
##    6    0.5217612  0.8837314  0.6747018  0.8565697  0.5651794  0.6555052  0.6313027    0.9187547    0.6828564       0.9003427       0.6828564  0.6313027  0.1365696     
##    7    0.5172092  0.8811596  0.6680525  0.8530595  0.5560289  0.6487194  0.6276577    0.9152819    0.6724423       0.8991108       0.6724423  0.6276577  0.1357806     
##    8    0.5200527  0.8792128  0.6605318  0.8499428  0.5483137  0.6431316  0.6256530    0.9118584    0.6629423       0.8982954       0.6629423  0.6256530  0.1353470     
##   10    0.5571118  0.8743746  0.6528501  0.8447350  0.5340662  0.6323300  0.6176285    0.9074282    0.6488478       0.8958708       0.6488478  0.6176285  0.1336110     
##   11    0.5619592  0.8731077  0.6478467  0.8439072  0.5307848  0.6294931  0.6134370    0.9075292    0.6475558       0.8948617       0.6475558  0.6134370  0.1327038     
##   12    0.5621255  0.8720096  0.6422759  0.8415402  0.5244085  0.6246832  0.6099712    0.9054652    0.6412885       0.8937994       0.6412885  0.6099712  0.1319542     
##   13    0.5709888  0.8713760  0.6399173  0.8415797  0.5244420  0.6246802  0.6101573    0.9054653    0.6411095       0.8938643       0.6411095  0.6101573  0.1319935     
##   14    0.5746133  0.8706794  0.6360188  0.8411455  0.5247496  0.6254326  0.6134330    0.9040052    0.6389073       0.8944841       0.6389073  0.6134330  0.1327036     
##   Balanced_Accuracy
##   0.5000000        
##   0.7389820        
##   0.7689910        
##   0.7751578        
##   0.7750287        
##   0.7714698        
##   0.7687557        
##   0.7625283        
##   0.7604831        
##   0.7577182        
##   0.7578113        
##   0.7587191        
## 
## AUC was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 3.
varImpPlot(rf$finalModel, type=2)

Partial dependence plot: do wealth differrence and wealth visiblity explain isolation?

pred.prob <- function(object, newdata) {
  pred <- predict(object, newdata, type="prob")
  prob.breakLink <- pred[, "breakLink"]
  mean(prob.breakLink)
}

pdp = partial(rf, pred.var = c("wealthDiff","visibleWealth","alter_behavior","egoWealth"), pred.fun = pred.prob, plot = FALSE)

pdp.df = data.frame(
  wealthDiff = pdp$wealthDiff,
  alter_behavior = pdp$alter_behavior,
  visibleWealth = pdp$visibleWealth,
  egoWealth = pdp$egoWealth,
  breakLink = pdp$yhat
)


# g.pdp <- ggplot(data=pdp.df,aes(x=wealthDiff,y=breakLink,color = factor(visibleWealth),shape=factor(alter_behavior)))+
#   geom_point() +
#   geom_line() +
#   ggtitle("Proportions of links broken by ego, by wealth difference and wealth visibility") +
#   xlab("Wealth difference (alter - ego)") +
#   ylab("Proportions of links broken") +
#   scale_color_discrete(name="Wealth visibility") + 
#   scale_shape(name="Behavior of alter") + 
#   theme_classic() +
#   theme(plot.title = element_text(hjust = 0.5)) 
# 
# g.pdp


g.visible = ggplot(data = pdp.df[pdp.df$visibleWealth=="Visible" & pdp.df$alter_behavior=="D",], 
                         aes(x = wealthDiff, y = egoWealth, fill = breakLink)) + 
  geom_tile() +
  ggtitle("Proportions of links broken by ego, \nby wealth difference and ego's wealth, \nwhen wealth is visible, \ngiven that alter defected") +
  scale_x_continuous("Wealth difference (alter - ego)") + 
  scale_y_continuous("Wealth of ego") +  
  scale_fill_continuous(type = "viridis", limits=c(0.3,0.8)) +
  labs(fill="Proportion of links \nbroken by ego")

g.invisible = ggplot(data = pdp.df[pdp.df$visibleWealth=="Not visible" & pdp.df$alter_behavior=="D",], 
                         aes(x = wealthDiff, y = egoWealth, fill = breakLink)) + 
  geom_tile() +
  ggtitle("Proportions of links broken by ego, \nby wealth difference and ego's wealth, \nwhen wealth is not visible, \ngiven that alter defected") +
  scale_x_continuous("Wealth difference (alter - ego)") + 
  scale_y_continuous("Wealth of ego") +  
  scale_fill_continuous(type = "viridis", limits=c(0.3,0.8)) +
  labs(fill="Proportion of links \nbroken by ego")

ggarrange(g.visible,g.invisible)

#rich and middle-class people become more generous when wealth is visible

Regression: greater wealth difference and visible wealth

ldata.reg = ldata[c("connecting","wealthDiff","visibleWealth","egoWealth","alter_behavior","round","gameID")]
ldata.reg = ldata.reg[complete.cases(ldata.reg),]
ldata.reg$connecting = ifelse(ldata.reg$connecting=="breakLink",1,0)
reg.link = miceadds::glm.cluster(connecting ~ wealthDiff*visibleWealth + egoWealth + alter_behavior + factor(round), 
      data = ldata.reg, 
      cluster = "gameID",
      family = binomial(link = "logit"))
summary(reg.link)
##                                      Estimate   Std. Error     z value     Pr(>|z|)
## (Intercept)                      7.155796e-01 1.985413e-01   3.6041855 3.131334e-04
## wealthDiff                       3.425646e-04 7.184800e-05   4.7679076 1.861492e-06
## visibleWealthVisible            -1.562342e-01 1.196633e-01  -1.3056157 1.916833e-01
## egoWealth                       -5.539615e-05 6.585905e-05  -0.8411319 4.002740e-01
## alter_behavior                  -3.446595e+00 1.676009e-01 -20.5642954 5.732649e-94
## factor(round)1                  -1.787068e-01 1.985790e-01  -0.8999281 3.681585e-01
## factor(round)2                  -3.115523e-01 1.886328e-01  -1.6516333 9.860932e-02
## factor(round)3                  -4.711658e-01 1.845068e-01  -2.5536509 1.066001e-02
## factor(round)4                  -2.725019e-01 1.794467e-01  -1.5185670 1.288715e-01
## factor(round)5                  -3.606493e-01 1.849159e-01  -1.9503420 5.113537e-02
## factor(round)6                  -4.337742e-01 2.143599e-01  -2.0235788 4.301351e-02
## factor(round)7                  -2.124201e-01 1.945856e-01  -1.0916538 2.749853e-01
## factor(round)8                  -6.917863e-01 2.111869e-01  -3.2757071 1.053978e-03
## factor(round)9                  -4.687630e-01 2.372295e-01  -1.9759895 4.815597e-02
## wealthDiff:visibleWealthVisible  2.047481e-05 1.062155e-04   0.1927667 8.471417e-01

random forest model: ego wealth and alter wealth

ldata.rf = ldata[c("connecting","visibleWealth","egoWealth","alterWealth","ego_behavior","alter_behavior","round")]
ldata.rf = ldata.rf[complete.cases(ldata.rf),]
ldata.rf$connecting = factor(ldata.rf$connecting)
ldata.rf$round = factor(ldata.rf$round)
ldata.rf = ldata.rf %>% mutate(round = factor(round, labels = make.names(levels(round))))
ldata.rf$ego_behavior=as.factor(ifelse(ldata.rf$ego_behavior==1,"C","D"))
ldata.rf$alter_behavior=as.factor(ifelse(ldata.rf$alter_behavior==1,"C","D"))
ldata.rf$visibleWealth=as.factor(ldata.rf$visibleWealth)

set.seed(10)
control.rf <- trainControl(method='repeatedcv', 
                           number=10, 
                           repeats=3,
                           search = 'random',
                           classProbs=TRUE,
                           summaryFunction = multiClassSummary,
                           allowParallel = TRUE)

model.weights <- ifelse(ldata.rf$connecting == "breakLink",
                        (1/table(ldata.rf$connecting)[1]) * 0.5,
                        (1/table(ldata.rf$connecting)[2]) * 0.5)

rf <- caret::train(connecting ~ alterWealth + visibleWealth + egoWealth + ego_behavior + alter_behavior + round,
                   data = ldata.rf,
                   method = 'rf',
                   metric = 'AUC',
                   tuneLength  = 30, 
                   verbose=TRUE,
                   weights=model.weights,
                   trControl = control.rf)
print(rf)
## Random Forest 
## 
## 8450 samples
##    6 predictor
##    2 classes: 'breakLink', 'notBreakLink' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 3 times) 
## Summary of sample sizes: 7605, 7604, 7605, 7604, 7605, 7605, ... 
## Resampling results across tuning parameters:
## 
##   mtry  logLoss    AUC        prAUC      Accuracy   Kappa      F1         Sensitivity  Specificity  Pos_Pred_Value  Neg_Pred_Value  Precision  Recall     Detection_Rate
##    1    0.9737396  0.8805074  0.5112470  0.7836688  0.0000000        NaN  0.0000000    1.0000000          NaN       0.7836688             NaN  0.0000000  0.0000000     
##    2    0.6289841  0.8880328  0.6145596  0.8563706  0.5193651  0.6028298  0.5054545    0.9532355    0.7497873       0.8748354       0.7497873  0.5054545  0.1093495     
##    3    0.5235274  0.8902504  0.6629115  0.8589754  0.5496430  0.6354366  0.5694770    0.9388896    0.7212131       0.8877577       0.7212131  0.5694770  0.1231966     
##    5    0.5082313  0.8874332  0.6782981  0.8556220  0.5579350  0.6483526  0.6161593    0.9217243    0.6856848       0.8969963       0.6856848  0.6161593  0.1332952     
##    6    0.5138224  0.8844749  0.6705949  0.8521905  0.5521839  0.6452618  0.6223593    0.9156336    0.6716022       0.8979011       0.6716022  0.6223593  0.1346368     
##    7    0.5420696  0.8813218  0.6660557  0.8467462  0.5384643  0.6352290  0.6179797    0.9098947    0.6554560       0.8962817       0.6554560  0.6179797  0.1336896     
##    8    0.5307923  0.8792238  0.6605041  0.8445764  0.5325949  0.6308217  0.6145149    0.9080827    0.6496237       0.8952135       0.6496237  0.6145149  0.1329399     
##   10    0.5467505  0.8754124  0.6497351  0.8394096  0.5185578  0.6201892  0.6066795    0.9036528    0.6359724       0.8928420       0.6359724  0.6066795  0.1312444     
##   11    0.5674095  0.8732148  0.6440282  0.8377521  0.5133347  0.6159973  0.6021137    0.9027973    0.6321312       0.8916359       0.6321312  0.6021137  0.1302577     
##   12    0.5666535  0.8719989  0.6406575  0.8367664  0.5095281  0.6127452  0.5975550    0.9027977    0.6301633       0.8905235       0.6301633  0.5975550  0.1292718     
##   13    0.5674477  0.8713465  0.6363574  0.8352278  0.5044546  0.6085925  0.5926350    0.9021936    0.6269483       0.8892590       0.6269483  0.5926350  0.1282066     
##   14    0.5820957  0.8697502  0.6327839  0.8342807  0.5027548  0.6076002  0.5937269    0.9006828    0.6236805       0.8893737       0.6236805  0.5937269  0.1284433     
##   Balanced_Accuracy
##   0.5000000        
##   0.7293450        
##   0.7541833        
##   0.7689418        
##   0.7689965        
##   0.7639372        
##   0.7612988        
##   0.7551661        
##   0.7524555        
##   0.7501764        
##   0.7474143        
##   0.7472048        
## 
## AUC was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 3.
varImpPlot(rf$finalModel, type=2)

Partial dependence plot: do ego vs. alter's wealth and wealth visiblity explain isolation?

pred.prob <- function(object, newdata) {
  pred <- predict(object, newdata, type="prob")
  prob.breakLink <- pred[, "breakLink"]
  mean(prob.breakLink)
}

pdp = partial(rf, pred.var = c("alterWealth","visibleWealth","alter_behavior","egoWealth"), pred.fun = pred.prob, plot = FALSE)

pdp.df = data.frame(
  alterWealth = pdp$alterWealth,
  alter_behavior = pdp$alter_behavior,
  visibleWealth = pdp$visibleWealth,
  egoWealth = pdp$egoWealth,
  breakLink = pdp$yhat
)


g.visible = ggplot(data = pdp.df[pdp.df$visibleWealth=="Visible" & pdp.df$alter_behavior=="D",], 
                   aes(x = alterWealth, y = egoWealth, fill = breakLink)) + 
  geom_tile() +
  ggtitle("Proportions of links broken by ego, \nby ego and alter's wealth, \nwhen wealth is visible, \ngiven that alter defected") +
  scale_x_continuous("Wealth of alter") + 
  scale_y_continuous("Wealth of ego") +  
  scale_fill_continuous(type = "viridis", limits=c(0.3,0.8)) +
  labs(fill="Proportion of links \nbroken by ego")


g.invisible = ggplot(data = pdp.df[pdp.df$visibleWealth=="Not visible" & pdp.df$alter_behavior=="D",], 
                     aes(x = alterWealth, y = egoWealth, fill = breakLink)) + 
  geom_tile() +
  ggtitle("Proportions of links broken by ego, \nby ego and alter's wealth, \nwhen wealth is not visible, \ngiven that alter defected") +
  scale_x_continuous("Wealth of alter") + 
  scale_y_continuous("Wealth of ego") +  
  scale_fill_continuous(type = "viridis", limits=c(0.3,0.8)) +
  labs(fill="Proportion of links \nbroken by ego")

ggarrange(g.visible,g.invisible)